home *** CD-ROM | disk | FTP | other *** search
- /* WIDE AREA INFORMATION SERVER SOFTWARE:
- No guarantees or restrictions. See the readme file for the full standard
- disclaimer.
- */
-
- /* Copyright (c) CNIDR (see ../COPYRIGHT) */
-
-
- /* this is a set of utilities for maintaining a stoplist
- * kind of a kludge.
- *
- * -brewster
- */
-
- #ifndef lint
- static char *RCSid = "$Header: /usr/users/freewais/FreeWAIS-0.1/ir/stoplist.c,v 1.1 1993/02/16 15:05:35 freewais Exp $";
- #endif
-
- /* Change log:
- * $Log: stoplist.c,v $
- * Revision 1.1 1993/02/16 15:05:35 freewais
- * Initial revision
- *
- * Revision 1.2 92/02/12 13:50:01 jonathan
- * Added "$Log" so RCS will put the log message in the header
- *
- *
- */
-
- #include "stoplist.h"
- #include "cutil.h"
- #include <string.h>
-
- static char stoplist_filename[MAX_FILENAME_LEN];/* dgg */
- long stoplist_pointer = 0;
- char *stoplist[] = {
- "a",
- "about",
- "above",
- "according",
- "across",
- "actually",
- "adj",
- "after",
- "afterwards",
- "again",
- "against",
- "all",
- "almost",
- "alone",
- "along",
- "already",
- "also",
- "although",
- "always",
- "among",
- "amongst",
- "an",
- "and",
- "another",
- "any",
- "anyhow",
- "anyone",
- "anything",
- "anywhere",
- "are",
- "aren't",
- "around",
- "as",
- "at",
- "b",
- "be",
- "became",
- "because",
- "become",
- "becomes",
- "becoming",
- "been",
- "before",
- "beforehand",
- "begin",
- "beginning",
- "behind",
- "being",
- "below",
- "beside",
- "besides",
- "between",
- "beyond",
- "billion",
- "both",
- "but",
- "by",
- "c",
- "can",
- "can't",
- "cannot",
- "caption",
- "co",
- "co.",
- "could",
- "couldn't",
- "d",
- "did",
- "didn't",
- "do",
- "does",
- "doesn't",
- "don't",
- "down",
- "during",
- "e",
- "each",
- "eg",
- "eight",
- "eighty",
- "either",
- "else",
- "elsewhere",
- "end",
- "ending",
- "enough",
- "etc",
- "even",
- "ever",
- "every",
- "everyone",
- "everything",
- "everywhere",
- "except",
- "f",
- "few",
- "fifty",
- "first",
- "five",
- "for",
- "former",
- "formerly",
- "forty",
- "found", "",
- "four",
- "from",
- "further",
- "g",
- "h",
- "had",
- "has",
- "hasn't",
- "have",
- "haven't",
- "he",
- "he'd",
- "he'll",
- "he's",
- "hence",
- "her",
- "here",
- "here's",
- "hereafter",
- "hereby",
- "herein",
- "hereupon",
- "hers",
- "herself",
- "him",
- "himself",
- "his",
- "how",
- "however",
- "hundred",
- "i",
- "i'd",
- "i'll",
- "i'm",
- "i've",
- "ie",
- "if",
- "in",
- "inc.",
- "indeed",
- "instead",
- "into",
- "is",
- "isn't",
- "it",
- "it's",
- "its",
- "itself",
- "j",
- "k",
- "l",
- "last",
- "later",
- "latter",
- "latterly",
- "least",
- "less",
- "let",
- "let's",
- "like",
- "likely",
- "ltd",
- "m",
- "made",
- "make",
- "makes",
- "many",
- "maybe",
- "me",
- "meantime",
- "meanwhile",
- "might",
- "million",
- "miss",
- "more",
- "moreover",
- "most",
- "mostly",
- "mr",
- "mrs",
- "much",
- "must",
- "my",
- "myself",
- "n",
- "namely",
- "neither",
- "never",
- "nevertheless",
- "next",
- "nine",
- "ninety",
- "no",
- "nobody",
- "none",
- "nonetheless",
- "noone",
- "nor",
- "not",
- "nothing",
- "now",
- "nowhere",
- "o",
- "of",
- "off",
- "often",
- "on",
- "once",
- "one",
- "one's",
- "only",
- "onto",
- "or",
- "other",
- "others",
- "otherwise",
- "our",
- "ours",
- "ourselves",
- "out",
- "over",
- "overall",
- "own",
- "p",
- "per",
- "perhaps",
- "q",
- "r",
- "rather",
- "recent",
- "recently",
- "s",
- "same",
- "seem",
- "seemed",
- "seeming",
- "seems",
- "seven",
- "seventy",
- "several",
- "she",
- "she'd",
- "she'll",
- "she's",
- "should",
- "shouldn't",
- "since",
- "six",
- "sixty",
- "so",
- "some",
- "somehow",
- "someone",
- "something",
- "sometime",
- "sometimes",
- "somewhere",
- "still",
- "stop",
- "such",
- "t",
- "taking",
- "ten",
- "than",
- "that",
- "that'll",
- "that's",
- "that've",
- "the",
- "their",
- "them",
- "themselves",
- "then",
- "thence",
- "there",
- "there'd",
- "there'll",
- "there're",
- "there's",
- "there've",
- "thereafter",
- "thereby",
- "therefore",
- "therein",
- "thereupon",
- "these",
- "they",
- "they'd",
- "they'll",
- "they're",
- "they've",
- "thirty",
- "this",
- "those",
- "though",
- "thousand",
- "three",
- "through",
- "throughout",
- "thru",
- "thus",
- "to",
- "together",
- "too",
- "toward",
- "towards",
- "trillion",
- "twenty",
- "two",
- "u",
- "under",
- "unless",
- "unlike",
- "unlikely",
- "until",
- "up",
- "upon",
- "us",
- "used",
- "using",
- "v",
- "very",
- "via",
- "w",
- "was",
- "wasn't",
- "we",
- "we'd",
- "we'll",
- "we're",
- "we've",
- "well",
- "were",
- "weren't",
- "what",
- "what'll",
- "what's",
- "what've",
- "whatever",
- "when",
- "whence",
- "whenever",
- "where",
- "where's",
- "whereafter",
- "whereas",
- "whereby",
- "wherein",
- "whereupon",
- "wherever",
- "whether",
- "which",
- "while",
- "whither",
- "who",
- "who'd",
- "who'll",
- "who's",
- "whoever",
- "whole",
- "whom",
- "whomever",
- "whose",
- "why",
- "will",
- "with",
- "within",
- "without",
- "won't",
- "would",
- "wouldn't",
- "x",
- "y",
- "yes",
- "yet",
- "you",
- "you'd",
- "you'll",
- "you're",
- "you've",
- "your",
- "yours",
- "yourself",
- "yourselves",
- "z", NULL
- };
-
- long nstops = 0;
- char **stop_from_file = NULL; /* dgg */
- char **stop_ptr; /* dgg */
- typedef char (*charptr);
-
- void stop_list_file (filename)
- char *filename;
- {
- strcpy (stoplist_filename, filename);
- }
-
- void init_stop_list () {
- #ifdef BIO
- if (nstops > 0 && stop_from_file != NULL) {/* dgg */
- int i;
- for (i = 0; i < nstops; i++)
- free (stop_from_file[i]);
- free (stop_from_file);
- nstops = 0;
- }
- /* printf("init_stop_list: stoplist file is '%s'\n", stoplist_filename); */
- if (stoplist_filename[0] != '\0') {
- char word[256];
- FILE * fp;
- fp = fopen (stoplist_filename, "r");
- nstops = 1;
- (void *) stop_from_file = malloc (nstops * sizeof (charptr));
- while (fgets (word, 255, fp)) {
- char *cp = strstr (word, "\n");
- if (cp != NULL)
- *cp = '\0';
- stop_from_file[nstops - 1] = malloc (strlen (word) + 1);
- strcpy (stop_from_file[nstops - 1], word);
- /* printf("init_stop_list: word %d is '%s'\n", nstops, stop_from_file[nstops-1]); */
- nstops++;
- (void *) stop_from_file = realloc (stop_from_file, nstops * sizeof (charptr));
- }
- fclose (fp);
- stop_from_file[nstops - 1] = NULL;
- stop_ptr = stop_from_file;
- /* printf("init_stop_list: read %d stop words from '%s'\n", nstops-1, stoplist_filename); */
- }
- else
- #endif
- stop_ptr = stoplist;
-
- stoplist_pointer = 0;
- }
-
-
- char *next_stop_word () {
- /* returns NULL if all out */
- if (stop_ptr[stoplist_pointer] == NULL)
- return (NULL);
- else
- return (stop_ptr[stoplist_pointer++]);
- }
-
-
-